function [] = calc_ratio(sumdir,lsort)
%This function works on syn_collated file structure after ur_analysis
%creates the sum_collate directory structure. It will by default calculate
%the ratio of KO/WT from sum_collated directory in the aprop directory.  
%Synatax:   [data] = calc_ratio();
%Input:     you will be prompted to point to the root of the directory
%               structure.
%           sumdir = the directory we are calculating the ratio in, which
%               by default sum_collated
%           lsort = sort by label, by default it is off = 0.  This is for
%               when the data is WT KO arrays are out of order or not the
%               same size, as happens in the the fenobam set.
%Output:    none

dir_root = uigetdir2('','Directory where the files are located');    %get the directory

if nargin<1||isempty(sumdir)
    sumdir = 'sum_collated';
end
if nargin<2
    lsort = 0;
end

dir_lv1 = dir(dir_root);    %get the file structure of the root dir.
out_root = [dir_root,filesep,'ratios',filesep];   %make the output directory structure root
mkdir(out_root);         %create the output root.

%now lets crank through the directories
col_label = {};
for i = 3:size(dir_lv1,1)   %level 1: root of the syn_collated, ur_analyzed directory
    dir_tmp = [dir_root,filesep,dir_lv1(i).name,filesep]; %the current level 2 directory
    %now we makes lots of assumptions and do the cacluations: expected
    %structure from here: dir_tmp\ko\layer\aprop\sum_collated
    %so now go grab the directory information
    kol4dir = dir([dir_tmp,'KO',filesep,'Layer4',filesep,'aprop',filesep,sumdir]);  %KO layer4 dirstruct
    kol5dir = dir([dir_tmp,'KO',filesep,'Layer5',filesep,'aprop',filesep,sumdir]);  %KO layer5 dirstruct
    wtl4dir = dir([dir_tmp,'WT',filesep,'Layer4',filesep,'aprop',filesep,sumdir]);  %WT layer4 dirstruct
    wtl5dir = dir([dir_tmp,'WT',filesep,'Layer5',filesep,'aprop',filesep,sumdir]);  %WT layer5 dirstruct
    %get the files we want from each of these directories
    [kol4ave,kol4med] = process_files(kol4dir);     %average and median data
    [kol5ave,kol5med] = process_files(kol5dir);     %average and median data
    [wtl4ave,wtl4med] = process_files(wtl4dir);     %average and median data
    [wtl5ave,wtl5med] = process_files(wtl5dir);     %average and median data
    %generate the labels for each file, essentially copy things over
    [l4ch_label,obs_label] = grab_labels([dir_tmp,'KO',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,kol4ave{1}]);
    l5ch_label = grab_labels([dir_tmp,'KO',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,kol5ave{1}]);
    col_obs_label = obs_label;
    %now lets do the calculations
    for j = 1:size(kol4ave,2)       %everything should be the same size
        %read files
        kol4avedata = single(dlmread([dir_tmp,'KO',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,kol4ave{j}],',',1,1));
        kol4meddata = single(dlmread([dir_tmp,'KO',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,kol4med{j}],',',1,1));
        kol5avedata = single(dlmread([dir_tmp,'KO',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,kol5ave{j}],',',1,1));
        kol5meddata = single(dlmread([dir_tmp,'KO',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,kol5med{j}],',',1,1));
        wtl4avedata = single(dlmread([dir_tmp,'WT',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,wtl4ave{j}],',',1,1));
        wtl4meddata = single(dlmread([dir_tmp,'WT',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,wtl4med{j}],',',1,1));
        wtl5avedata = single(dlmread([dir_tmp,'WT',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,wtl5ave{j}],',',1,1));
        wtl5meddata = single(dlmread([dir_tmp,'WT',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,wtl5med{j}],',',1,1));
        %sort the data is desired - the gets complicated
        if lsort    %we are going to assume correspondence across the same set meaning L4 and L5
            l4koch_label = l4ch_label;    %already acquired
            l5koch_label = l5ch_label;
            l4wtch_label = grab_labels([dir_tmp,'WT',filesep,'Layer4',filesep,'aprop',filesep,sumdir,filesep,wtl4ave{j}]);
            l5wtch_label = grab_labels([dir_tmp,'WT',filesep,'Layer5',filesep,'aprop',filesep,sumdir,filesep,wtl5ave{j}]);
            %do l4 first
            [kol4avedata,wtl4avedata,l4ch_label] = labelsort(l4koch_label,kol4avedata,l4wtch_label,wtl4avedata);  %average data sorted
            [kol4meddata,wtl4meddata,l4ch_label] = labelsort(l4koch_label,kol4meddata,l4wtch_label,wtl4meddata);  %median data sorted
            [kol5avedata,wtl5avedata,l5ch_label] = labelsort(l5koch_label,kol5avedata,l5wtch_label,wtl5avedata);  %average data sorted
            [kol5meddata,wtl5meddata,l5ch_label] = labelsort(l5koch_label,kol5meddata,l5wtch_label,wtl5meddata);  %median data sorted
        end
        %initialize
        l4averatio = [];
        l4medratio = [];
        l5averatio = [];
        l5medratio = [];
        for k = 1:size(kol4avedata,1)   %again lets hope everything is the same size
            l4avetmp = kol4avedata(k,:)./wtl4avedata(k,:);      %layer 4 ave ratios
            l4medtmp = kol4meddata(k,:)./wtl4meddata(k,:);      %layer 4 med ratios
            l5avetmp = kol5avedata(k,:)./wtl5avedata(k,:);      %layer 5 ave ratios
            l5medtmp = kol5meddata(k,:)./wtl5meddata(k,:);      %layer 5 med ratios
            %make sure we don't generate any inf cells
            l4avetmp(l4avetmp==inf) = nan;
            l4medtmp(l4medtmp==inf) = nan;
            l5avetmp(l5avetmp==inf) = nan;
            l5medtmp(l5medtmp==inf) = nan;
            l4averatio = vertcat(l4averatio,l4avetmp);
            l4medratio = vertcat(l4medratio,l4medtmp);
            l5averatio = vertcat(l5averatio,l5avetmp);
            l5medratio = vertcat(l5medratio,l5medtmp);
        end
        %create the directory structure if it is the first time through
        if i==3
            mkdir(out_root,['prop',num2str(j)])
        end
        %save out the data files
        l4ardata = dataset({l4averatio,l4ch_label{:}},'obsname',obs_label);
        sav2csv(l4ardata,[dir_lv1(i).name,'_l4_ave_prop',num2str(j),'.csv'],[out_root,['prop',num2str(j)]]);
        l4mrdata = dataset({l4medratio,l4ch_label{:}},'obsname',obs_label);
        sav2csv(l4mrdata,[dir_lv1(i).name,'_l4_med_prop',num2str(j),'.csv'],[out_root,['prop',num2str(j)]]);
        l5ardata = dataset({l5averatio,l5ch_label{:}},'obsname',obs_label);
        sav2csv(l5ardata,[dir_lv1(i).name,'_l5_ave_prop',num2str(j),'.csv'],[out_root,['prop',num2str(j)]]);
        l5mrdata = dataset({l5medratio,l5ch_label{:}},'obsname',obs_label);
        sav2csv(l5mrdata,[dir_lv1(i).name,'_l5_med_prop',num2str(j),'.csv'],[out_root,['prop',num2str(j)]]);
        %create a collated version for further export ease: only use the
        %average for now, std, etc. can come later if needed
        %OK this is cludgy, but we got to handle Inh cases
        if strcmp(dir_lv1(i).name(end-2:end),'GAD')
            obs_tmp = char(obs_label);  %we are going to remove the non-overlapping channels with excitatory.  Create char arry
            obs_tmp = cellstr(obs_tmp(:,1:10));  %trunkate then recreate cell array
            rm_idx = strcmp(obs_tmp,'aprop_PSD-');  %find the items we want to remove
            l4averatio(rm_idx,:) = [];      %gone
            l4medratio(rm_idx,:) = [];
            l5averatio(rm_idx,:) = [];      %gone
            l5medratio(rm_idx,:) = [];
            if j==1
                col_obs_label(rm_idx,:) = [];   %just in case this is the last
            end
        end
        l4ar_tmp = nanmean(l4averatio,2);   %mean
        l4mr_tmp = nanmean(l4medratio,2);
        l5ar_tmp = nanmean(l5averatio,2);   %mean
        l5mr_tmp = nanmean(l5medratio,2);
        %collate
        l4arcollate(:,i-2,j) = l4ar_tmp;
        l4mrcollate(:,i-2,j) = l4mr_tmp;
        l5arcollate(:,i-2,j) = l5ar_tmp;
        l5mrcollate(:,i-2,j) = l5mr_tmp;
    end
    %create a column label for the collated data
    col_label = horzcat(col_label,dir_lv1(i).name);
end
%save out the collated data files
for m = 1:size(l4arcollate,3)   %go through each property set
    l4arcoldata = dataset({l4arcollate(:,:,m),col_label{:}},'obsname',col_obs_label);
    sav2csv(l4arcoldata,['collated_l4_ave_prop',num2str(m),'.csv'],[out_root,['prop',num2str(m)]]);
    l4mrcoldata = dataset({l4mrcollate(:,:,m),col_label{:}},'obsname',col_obs_label);
    sav2csv(l4mrcoldata,['collated_l4_med_prop',num2str(m),'.csv'],[out_root,['prop',num2str(m)]]);
    l5arcoldata = dataset({l5arcollate(:,:,m),col_label{:}},'obsname',col_obs_label);
    sav2csv(l5arcoldata,['collated_l5_ave_prop',num2str(m),'.csv'],[out_root,['prop',num2str(m)]]);
    l5mrcoldata = dataset({l5mrcollate(:,:,m),col_label{:}},'obsname',col_obs_label);
    sav2csv(l5mrcoldata,['collated_l5_med_prop',num2str(m),'.csv'],[out_root,['prop',num2str(m)]]);
end
%--------------------------------------------------------------------------
%subfunction to resort the dataset and even out uneven datasets
function [ko_tmp,wt_tmp,ch_tmp] = labelsort(ko_label,ko_data,wt_label,wt_data)
        
if size(ko_label,2)>=size(wt_label,2)    %find which label is longer, or the same
    lswitch  = 'ko';   %create a switch for KO is longer
    iter = size(ko_label,2);    %number of iterations
elseif size(ko_label,2)<size(wt_label,2)
    lswitch = 'wt';    %swithc is set for WT is longer
    iter = size(wt_label,2);
end
pidx = 1; %initiate the place idx
for l = 1:iter
    switch lswitch
        case 'ko'
            midx = strcmp(wt_label,ko_label{l});    %generate match idx
            if max(midx)==1     %there is a match
                midx = find(midx==1);   %grab the location
                ch_tmp(pidx) = ko_label(l);     %reconstruct the channel label
                ko_tmp(:,pidx) = ko_data(:,l);  %put in the data
                wt_tmp(:,pidx) = wt_data(:,midx);   %place in the matching data
                wtusedidx(pidx) = midx;   %mark the column as used
                kousedidx(pidx) = l;
                pidx = pidx+1;  %iterate
            end
        case 'wt'
            midx = strcmp(ko_label,wt_label{l});    %generate match idx
            if max(midx)==1     %there is a match
                midx = find(midx==1);   %grab the location
                ch_tmp(pidx) = wt_label(l);     %reconstruct the channel label
                wt_tmp(:,pidx) = wt_data(:,l);  %put in the data
                ko_tmp(:,pidx) = ko_data(:,midx);   %place in the matching data
                kousedidx(pidx) = midx;   %mark the column as used
                wtusedidx(pidx) = l;
                pidx = pidx+1;  %iterate
            end
    end
end
%now deal with the unmatched cases
wt_data(:,wtusedidx) = [];  %remove the used data
ko_data(:,kousedidx) = [];
wt_label(:,wtusedidx) = [];
ko_label(:,kousedidx) = [];
if size(wt_data,2)~=size(ko_data,2)     %if they are equal in size, then just put them into the array as it is
    % they are not the same!
    switch lswitch
        case 'ko'   %ko is longer
            diff = size(ko_data,2)-size(wt_data,2);     %find the difference
            %append the blank
            wt_data = horzcat(nan(size(wt_data,1),diff),wt_data);
            wt_label = horzcat(repmat('null',1,diff),wt_label);
        case 'wt'   %wt is longer
            diff = size(wt_data,2)-size(ko_data,2);     %find the difference
            %append the blank
            ko_data = horzcat(nan(size(ko_data,1),diff),ko_data);
            ko_label = horzcat(repmat('null',1,diff),ko_label);
    end
end
%recapitulate arrays
wt_tmp = horzcat(wt_tmp,wt_data);
ko_tmp = horzcat(ko_tmp,ko_data);
%combine the residual labels and append
ch_tmp = horzcat(ch_tmp,cellstr([char(ko_label) char(wt_label)])');

%--------------------------------------------------------------------------
%subfunction to grab the column labels, or experiments of the dataset
function [ch_label,obs_label] = grab_labels(filepath)
fid = fopen(filepath);
filedata = textscan(fid,'%s %s %s %s %s %s %s','Delimiter',',');
fclose(fid);
obs_label = filedata{1,1}(2:end,1);     %channel labels
for i = 2:size(filedata,2);
    ch_label{i-1} = filedata{i}{1};     %dataset labels
end
rmidx = strcmp(ch_label,'');    %remove empty columns
ch_label(rmidx) = [];   %gone
%--------------------------------------------------------------------------
%subfunction to seperate files from directories
function [ave,med] = process_files(dir_struct)

for i = 3:size(dir_struct,1)    %go through the 
    idx(i-2) = dir_struct(i).isdir;     %pull the isdir info
    names{i-2} = dir_struct(i).name;     %pull the file and dir names
end

%get the files
list = names(~idx);
%filter for only the files we want
ave_idx = ones(size(list));   %create an index for selecting the flat files
med_idx = ones(size(list));   %same for prop
for j = 1:size(list,2);
    %the only files we care about are the flat rotated vertices and the properties for now
    if ~strcmp('ave',list{j}(1:3))
        ave_idx(j) = 0;    %set this for removal
    end
    if ~strcmp('med',list{j}(1:3))
        med_idx(j) = 0;    %set this for removal
    end
end
%create the desired lists of file names.
ave = list(logical(ave_idx));    %list for flat vertices files
med = list(logical(med_idx));    %list for prop files